# -*- coding: utf-8 -*-
import random
import scrapy
from scrapy import Request
from scrapy.exceptions import IgnoreRequest
from zc_core.model.items import Box
from zc_core.util.http_util import retry_request
from cmcc.rules import *
from cmcc.util.login import SeleniumLogin
from zc_core.spiders.base import BaseSpider


class SkuSpider(BaseSpider):
    name = 'sku'
    custom_settings = {
        'CONCURRENT_REQUESTS': 8,
        'CONCURRENT_REQUESTS_PER_DOMAIN': 8,
        'CONCURRENT_REQUESTS_PER_IP': 8,
    }
    # 常用链接
    index_url = 'http://b2bjoy.10086.cn/oscp/home/main.html'
    sku_list_url = 'http://b2bjoy.10086.cn/oscp/goods/category/goodsList.html?id={}&sort=salePrice_asc&pageNumber={}&pageSize={}'

    def __init__(self, batchNo=None, *args, **kwargs):
        super(SkuSpider, self).__init__(batchNo=batchNo, *args, **kwargs)
        self.page_size = 100

        # self.proxy = 'http://114.220.158.49:4236'
        # self.cookies = {'JSESSIONID': '68F4034279C7B403804FE9A5D6B72AFA', 'centralSessionId': '_userId-c1db073ffef143c2822e8dfb024c2192_a028a6e631294b76bd5935cb89b3bf65'}
        # self.proxy = 'http://180.108.129.164:4236'
        # self.cookies = {'JSESSIONID': '02157281AA2DBBB57D48C5123BAEE15C', 'centralSessionId': '_userId-c1db073ffef143c2822e8dfb024c2192_9b6b89732a63413c9d80aaea4a56f46f'}
        # self.proxy = 'http://49.85.224.191:4236'
        # self.cookies = {'JSESSIONID': 'FD884EBFE9148417CBDC798D127FE828', 'centralSessionId': '_userId-c1db073ffef143c2822e8dfb024c2192_bb547b97b7004ed0a43fe13224590e1a'}

        # self.proxy = 'http://121.239.189.12:4236'
        # self.cookies = {'JSESSIONID': '323DC7EB3A99D1F99F2E7C0EC343E6DB', 'centralSessionId': '_userId-c1db073ffef143c2822e8dfb024c2192_f8c46a65cb79406d93bafd2310dd44a1'}
        # self.proxy = 'http://49.65.135.210:4232'
        # self.cookies = {'JSESSIONID': 'B9244EBF9787F040DB8FEE78EDE1DB93', 'centralSessionId': '_userId-c1db073ffef143c2822e8dfb024c2192_92d923307bf045dcaa55d3a1c0a6c27f'}
        self.proxy = 'http://114.235.187.92:4258'
        self.cookies = {'JSESSIONID': '23AFE7FBF0040E481E82409648198E74',
                        'centralSessionId': '_userId-c1db073ffef143c2822e8dfb024c2192_75948cc57dc84e68bef1e31170707416'}

    def start_requests(self):
        # cookies = SeleniumLogin().get_cookies()
        if not self.cookies:
            self.logger.error('init cookie failed...')
            return
        self.logger.info('init cookie: %s', self.cookies)

        yield Request(
            url=self.index_url,
            meta={
                'proxy': self.proxy,
                'reqType': 'catalog',
                'batchNo': self.batch_no,
                'init_cookies': self.cookies,
            },
            cookies=self.cookies,
            callback=self.parse_catalog,
            errback=self.error_back,
            # priority=200,
            dont_filter=True
        )

    # 品类
    def parse_catalog(self, response):
        meta = response.meta
        batch_no = meta.get('batchNo')
        # 处理品类列表
        cats = parse_catalog(response)
        random.shuffle(cats)
        if cats:
            self.logger.info('品类: count[%s]' % len(cats))
            yield Box('catalog', self.batch_no, cats)

            # cats = CatalogPool().get_catalogs()
            for cat in cats:
                if cat and cat.get('level') == 3:
                    page = 1
                    cat_id = cat.get('catalogId')
                    yield Request(
                        url=self.sku_list_url.format(cat_id, page, self.page_size),
                        cookies=self.cookies,
                        callback=self.parse_sku_page,
                        errback=self.error_back,
                        meta={
                            'proxy': self.proxy,
                            'reqType': 'sku',
                            'batchNo': batch_no,
                            'pageSize': self.page_size,
                            'page': page,
                            'catId': cat_id,
                        },
                        # priority=100,
                        dont_filter=True
                    )

    # 处理sku列表
    def parse_sku_page(self, response):
        meta = response.meta
        batch_no = meta.get('batchNo')
        cur_page = meta.get('page')
        cat_id = meta.get('catId')

        # 第一页商品
        sku_list = parse_sku(response)
        if sku_list:
            self.logger.info('清单1: cat=%s, page=%s, cnt=%s' % (cat_id, cur_page, len(sku_list)))
            yield Box('sku', self.batch_no, sku_list)
        else:
            self.logger.info('分页为空: cat=%s, page=%s' % (cat_id, cur_page))

        # 更多页
        pages = parse_sku_page(response)
        self.logger.info('总页数: cat=%s, total=%s' % (cat_id, pages))
        if pages and pages > 1:
            for page in range(2, pages + 1):
                yield Request(
                    url=self.sku_list_url.format(cat_id, page, self.page_size),
                    cookies=self.cookies,
                    callback=self.parse_sku,
                    errback=self.error_back,
                    meta={
                        'proxy': self.proxy,
                        'reqType': 'sku',
                        'batchNo': batch_no,
                        'pageSize': self.page_size,
                        'page': page,
                        'catId': cat_id,
                    },
                    # priority=100,
                    dont_filter=True
                )

    # 处理sku列表
    def parse_sku(self, response):
        meta = response.meta
        cur_page = meta.get('page')
        cat_id = meta.get('catId')
        sku_list = parse_sku(response)
        if sku_list:
            self.logger.info('清单2: cat=%s, page=%s, cnt=%s' % (cat_id, cur_page, len(sku_list)))
            yield Box('sku', self.batch_no, sku_list)
        else:
            self.logger.info('分页为空: cat=%s, page=%s' % (cat_id, cur_page))
